Load

# File generated in /perturbation_16s/analysis/analysis_summer2019/generate_phyloseq.rmd
psSubj <- readRDS("../../data/16S/phyloseq/perturb_physeq_participants_decontam_15Jul19.rds")
psSubj
phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 2425 taxa and 4402 samples ]
sample_data() Sample Data:       [ 4402 samples by 40 sample variables ]
tax_table()   Taxonomy Table:    [ 2425 taxa by 12 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 2425 tips and 2424 internal nodes ]
# otu_table()   OTU Table:         [ 2425 taxa and 4402 samples ]
# sample_data() Sample Data:       [ 4402 samples by 40 sample variables ]
SMP <- data.frame(sample_data(psSubj))
SUBJ <- SMP %>% select(Subject, Age:BirthYear) %>% distinct()
levels <- unique(SMP$Interval)
cols_itv <- c("grey77", colorRampPalette(brewer.pal(9, "Set1"))(length(levels) - 1))
names(cols_itv) <- c("NoInterv", setdiff(levels, "NoInterv"))
SMP %>%
    select(Group, Subject) %>%
    distinct() %>%
    group_by(Group) %>%
    summarise(subject_count = n())
load("output/pairwise_dist_subj_16S.rda")
ls()
 [1] "abx_bray_to_baseline"      "abx_intv_cols"             "abx_jacc_to_baseline"      "abx_uniFrac_to_baseline"  
 [5] "bray_to_baseline"          "bray.df"                   "brayD.ihs"                 "cc_bray_to_baseline"      
 [9] "cc_intv_cols"              "cc_jacc_to_baseline"       "cc_uniFrac_to_baseline"    "cols_itv"                 
[13] "curdir"                    "datadir"                   "diet_bray_to_baseline"     "diet_intv_cols"           
[17] "diet_jacc_to_baseline"     "diet_uniFrac_to_baseline"  "intv_cols"                 "jacc_to_baseline"         
[21] "jacc.df"                   "jaccardD"                  "levels"                    "noItv_bray_to_baseline"   
[25] "noItv_jacc_to_baseline"    "noItv_uniFrac_to_baseline" "psSubj"                    "SMP"                      
[29] "SUBJ"                      "uniFrac_to_baseline"       "uniFrac.df"                "uniFracD.lst"             

Distance to baseline

Bray-Curtis distance

Mean shift from baseline samples [-7, 0] measured with Bray-Curtis distance.

abx_bray_to_baseline <- bray.df %>%
  filter(Abx_RelDay_1 >= -7 & Abx_RelDay_1 <= 0) %>%
  group_by(Subject, Group, Abx_RelDay_2) %>%
  summarize(dist_to_baseline = mean(dist)) %>%
  rename(Abx_RelDay = Abx_RelDay_2) %>%
  left_join(SMP)

diet_bray_to_baseline <- bray.df %>%
  filter(Diet_RelDay_1 >= -7 & Diet_RelDay_1 <= 0) %>%
  group_by(Subject, Group, Diet_RelDay_2) %>%
  summarize(dist_to_baseline = mean(dist)) %>%
  rename(Diet_RelDay = Diet_RelDay_2) %>%
  left_join(SMP)

cc_bray_to_baseline <- bray.df %>%
  filter(CC_RelDay_1 >= -7 & CC_RelDay_1 <= 0) %>%
  group_by(Subject, Group, CC_RelDay_2) %>%
  summarize(dist_to_baseline = mean(dist)) %>%
  rename(CC_RelDay = CC_RelDay_2) %>%
  left_join(SMP)


noItv_bray_to_baseline <- bray.df %>%
  filter(Group == "NoIntv", DaysFromStart_1 >= 0, DaysFromStart_1 <= 10) %>%
  group_by(Subject, Group, DaysFromStart_2) %>%
  summarize(dist_to_baseline = mean(dist)) %>%
  rename(DaysFromStart = DaysFromStart_2) %>%
  left_join(SMP) %>%
  mutate(Interval = as.character(Interval))

bray_to_baseline <- list(
  abx_bray_to_baseline %>% mutate(Interval = Abx_Interval, RelDay = Abx_RelDay) %>%
    select(Meas_ID, Subject, Group, Interval, RelDay, dist_to_baseline),
  diet_bray_to_baseline %>% mutate(Interval = Diet_Interval, RelDay = Diet_RelDay) %>%
    select(Meas_ID, Subject, Group, Interval, RelDay, dist_to_baseline),
  cc_bray_to_baseline %>% mutate(Interval = CC_Interval, RelDay = CC_RelDay) %>%
    select(Meas_ID, Subject, Group, Interval, RelDay, dist_to_baseline),
  noItv_bray_to_baseline %>% mutate(RelDay = DaysFromStart) %>%
    select(Meas_ID, Subject, Group, Interval, RelDay, dist_to_baseline)
)
names(bray_to_baseline) <- c("Abx", "Diet", "CC", "NoIntv")
bray_to_baseline <- plyr::ldply(bray_to_baseline, function(df) df, .id = "perturbation") %>%
  mutate(Interval = factor(Interval, levels = names(intv_cols)))

# dim(abx_bray_to_baseline)  
# dim(diet_bray_to_baseline) 
# dim(cc_bray_to_baseline) 
# dim(noItv_bray_to_baseline)
# head(abx_bray_to_baseline)  
# head(diet_bray_to_baseline)  
# head(cc_bray_to_baseline)  
# head(noItv_bray_to_baseline)  


save(list = c("brayD.ihs", "bray.df", "bray_to_baseline",
              "jaccardD", "jacc.df",
              "uniFracD.lst", "uniFrac.df"),
     file = "output/pairwise_dist_to_baseline_subj_16S.rda")
bray_to_baseline %>%
  ggplot(
    aes(x = RelDay, y = dist_to_baseline, 
        group = Subject, color = Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1.2) + 
  scale_color_manual(values = intv_cols) + 
  theme(legend.position = "bottom") + 
  facet_wrap(~ perturbation, scales = "free_x") +
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from  perturbation") +
  ylab("Bray-Curtis distance to baseline") 

bray_to_baseline %>%
  filter(perturbation != "NoIntv") %>%
  ggplot(
    aes(x = RelDay, y = dist_to_baseline, 
        group = Subject, color = Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1) + 
  scale_color_manual(values = intv_cols) + 
  theme(legend.position = "bottom") + 
  facet_wrap(~ perturbation, scales = "free_x", ncol = 3) +
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from  perturbation") +
  ylab("Bray-Curtis distance to baseline") 

bray_to_baseline %>%
  filter(perturbation != "NoIntv", RelDay <=30, RelDay >= -30) %>%
  ggplot(
    aes(x = RelDay, y = dist_to_baseline, 
        group = Subject, color = Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1) + 
  scale_color_manual(values = intv_cols) + 
  theme(legend.position = "bottom") + 
  facet_wrap(~ perturbation, ncol = 3) +
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from  perturbation") +
  ylab("Bray-Curtis distance to baseline") 

abx_bray_to_baseline %>%
  mutate(Abx_Interval = factor(Abx_Interval, level = names(abx_intv_cols))) %>%
  filter(Abx_RelDay >= -50) %>%
  ggplot(
    aes(x = Abx_RelDay, y = dist_to_baseline, 
        group = Subject, color = Abx_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1.2) + 
  scale_color_manual(values = abx_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from initial antibiotic dose") +
  ylab("Bray-Curtis distance to 7 pre-antibiotic samples") 

abx_bray_to_baseline %>%
  mutate(Abx_Interval = factor(Abx_Interval, level = names(abx_intv_cols))) %>%
  filter(Abx_RelDay <= 30, Abx_RelDay >= -30) %>%
  ggplot(
    aes(x = Abx_RelDay, y = dist_to_baseline, 
        group = Subject, color = Abx_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1.5) + 
  scale_color_manual(values = abx_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from initial antibiotic dose") +
  ylab("Bray-Curtis distance to 7 pre-antibiotic samples") +
  ylim(0.05, 0.85)

diet_bray_to_baseline %>%
  mutate(Diet_Interval = factor(Diet_Interval, level = names(diet_intv_cols))) %>%
  filter(Diet_RelDay <= 50) %>%
  ggplot(
    aes(x = Diet_RelDay, y = dist_to_baseline, 
        group = Subject, color = Diet_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.7, size = 1.2) + 
  scale_color_manual(values = diet_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from diet initiation") +
  ylab("Bray-Curtis distance to 7 pre-diet samples") 

diet_bray_to_baseline %>%
  mutate(Diet_Interval = factor(Diet_Interval, level = names(diet_intv_cols))) %>%
  filter(Diet_RelDay <= 30, Diet_RelDay >= -30) %>%
  ggplot(
    aes(x = Diet_RelDay, y = dist_to_baseline, 
        group = Subject, color = Diet_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.7, size = 1.2) + 
  scale_color_manual(values = diet_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from diet initiation") +
  ylab("Bray-Curtis distance to 7 pre-diet samples") +
  ylim(0.05, 0.85)

cc_bray_to_baseline %>%
  mutate(CC_Interval = factor(CC_Interval, level = names(cc_intv_cols))) %>%
  filter(CC_RelDay >= -50 , CC_RelDay <= 50) %>%
  ggplot(
    aes(x = CC_RelDay, y = dist_to_baseline, 
        group = Subject, color = CC_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.7, size = 1.2) + 
  scale_color_manual(values = cc_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from colon cleanout") +
  ylab("Bray-Curtis distance to 7 pre-colon-cleanout samples")

cc_bray_to_baseline %>%
  mutate(CC_Interval = factor(CC_Interval, level = names(cc_intv_cols))) %>%
 filter(CC_RelDay >= -30 , CC_RelDay <= 30) %>%
  ggplot(
    aes(x = CC_RelDay, y = dist_to_baseline, 
        group = Subject, color = CC_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.7, size = 1.2) + 
  scale_color_manual(values = cc_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from colon cleanout") +
  ylab("Bray-Curtis distance to 7 pre-colon-cleanout samples") +
  ylim(0.05, 0.85)

Jaccard distance

abx_jacc_to_baseline <- jacc.df %>%
  filter(Abx_RelDay_1 >= -7 & Abx_RelDay_1 <= 0) %>%
  group_by(Subject, Group, Abx_RelDay_2) %>%
  summarize(dist_to_baseline = mean(dist)) %>%
  rename(Abx_RelDay = Abx_RelDay_2) %>%
  left_join(SMP)

diet_jacc_to_baseline <- jacc.df %>%
  filter(Diet_RelDay_1 >= -7 & Diet_RelDay_1 <= 0) %>%
  group_by(Subject, Group, Diet_RelDay_2) %>%
  summarize(dist_to_baseline = mean(dist)) %>%
  rename(Diet_RelDay = Diet_RelDay_2) %>% 
  left_join(SMP)

cc_jacc_to_baseline <- jacc.df %>%
  filter(CC_RelDay_1 >= -7 & CC_RelDay_1 <= 0) %>%
  group_by(Subject, Group, CC_RelDay_2) %>%
  summarize(dist_to_baseline = mean(dist)) %>%
  rename(CC_RelDay = CC_RelDay_2) %>%
  left_join(SMP)


noItv_jacc_to_baseline <- jacc.df %>%
  filter(Group == "NoIntv", DaysFromStart_1 >= 0, DaysFromStart_1 <= 10) %>%
  group_by(Subject, Group, DaysFromStart_2) %>%
  summarize(dist_to_baseline = mean(dist)) %>%
  rename(DaysFromStart = DaysFromStart_2) %>%
  left_join(SMP) %>%
  mutate(Interval = as.character(Interval))

jacc_to_baseline <- list(
  abx_jacc_to_baseline %>% mutate(Interval = Abx_Interval, RelDay = Abx_RelDay) %>%
    select(Meas_ID, Subject, Group, Interval, RelDay, dist_to_baseline),
  diet_jacc_to_baseline %>% mutate(Interval = Diet_Interval, RelDay = Diet_RelDay) %>%
    select(Meas_ID, Subject, Group, Interval, RelDay, dist_to_baseline),
  cc_jacc_to_baseline %>% mutate(Interval = CC_Interval, RelDay = CC_RelDay) %>%
    select(Meas_ID, Subject, Group, Interval, RelDay, dist_to_baseline),
  noItv_jacc_to_baseline %>% mutate(RelDay = DaysFromStart) %>%
    select(Meas_ID, Subject, Group, Interval, RelDay, dist_to_baseline)
)
names(jacc_to_baseline) <- c("Abx", "Diet", "CC", "NoIntv")
jacc_to_baseline <- plyr::ldply(jacc_to_baseline, function(df) df, .id = "perturbation") %>%
  mutate(Interval = factor(Interval, levels = names(intv_cols)))

save(list = c("brayD.ihs", "bray.df", "bray_to_baseline",
              "jaccardD", "jacc.df", "jacc_to_baseline",
              "uniFracD.lst", "uniFrac.df"),
     file = "output/pairwise_dist_to_baseline_subj_16S.rda")
jacc_to_baseline %>%
  ggplot(
    aes(x = RelDay, y = dist_to_baseline, 
        group = Subject, color = Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1.2) + 
  scale_color_manual(values = intv_cols) + 
  theme(legend.position = "bottom") + 
  facet_wrap(~ perturbation, scales = "free_x") +
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from  perturbation") +
  ylab("Jaccard distance to baseline") 

jacc_to_baseline %>%
  filter(perturbation != "NoIntv") %>%
  ggplot(
    aes(x = RelDay, y = dist_to_baseline, 
        group = Subject, color = Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1) + 
  scale_color_manual(values = intv_cols) + 
  theme(legend.position = "bottom") + 
  facet_wrap(~ perturbation, scales = "free_x", ncol = 3) +
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from  perturbation") +
  ylab("Jaccard distance to baseline") 

jacc_to_baseline %>%
  filter(perturbation != "NoIntv", RelDay <=30, RelDay >= -30) %>%
  ggplot(
    aes(x = RelDay, y = dist_to_baseline, 
        group = Subject, color = Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1) + 
  scale_color_manual(values = intv_cols) + 
  theme(legend.position = "bottom") + 
  facet_wrap(~ perturbation, ncol = 3) +
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from  perturbation") +
  ylab("Jaccard distance to baseline") 

abx_jacc_to_baseline %>%
  mutate(Abx_Interval = factor(Abx_Interval, level = names(abx_intv_cols))) %>%
  filter(Abx_RelDay >= -50) %>%
  ggplot(
    aes(x = Abx_RelDay, y = dist_to_baseline, 
        group = Subject, color = Abx_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1.2) + 
  scale_color_manual(values = abx_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from initial antibiotic dose") +
  ylab("Jaccard distance to 7 pre-antibiotic samples")

abx_jacc_to_baseline %>%
  mutate(Abx_Interval = factor(Abx_Interval, level = names(abx_intv_cols))) %>%
  filter(Abx_RelDay <= 30, Abx_RelDay >= -30) %>%
  ggplot(
    aes(x = Abx_RelDay, y = dist_to_baseline, 
        group = Subject, color = Abx_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1.5) + 
  scale_color_manual(values = abx_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from initial antibiotic dose") +
  ylab("Jaccard distance to 7 pre-antibiotic samples") +
  ylim(0.1, 1.0)

diet_jacc_to_baseline %>%
  mutate(Diet_Interval = factor(Diet_Interval, level = names(diet_intv_cols))) %>%
  filter(Diet_RelDay <= 50) %>%
  ggplot(
    aes(x = Diet_RelDay, y = dist_to_baseline, 
        group = Subject, color = Diet_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.7, size = 1.2) + 
  scale_color_manual(values = diet_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from diet initiation") +
  ylab("Jaccard distance to 7 pre-diet samples")

diet_jacc_to_baseline %>%
  mutate(Diet_Interval = factor(Diet_Interval, level = names(diet_intv_cols))) %>%
  filter(Diet_RelDay <= 30, Diet_RelDay >= -30) %>%
  ggplot(
    aes(x = Diet_RelDay, y = dist_to_baseline, 
        group = Subject, color = Diet_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.7, size = 1.2) + 
  scale_color_manual(values = diet_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from diet initiation") +
  ylab("Jaccard distance to 7 pre-diet samples") +
  ylim(0.1, 1.0)

cc_jacc_to_baseline %>%
  mutate(CC_Interval = factor(CC_Interval, level = names(cc_intv_cols))) %>%
  filter(CC_RelDay >= -50 , CC_RelDay <= 50) %>%
  ggplot(
    aes(x = CC_RelDay, y = dist_to_baseline, 
        group = Subject, color = CC_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.7, size = 1.2) + 
  scale_color_manual(values = cc_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from colon cleanout") +
  ylab("Jaccard distance to 7 pre-colon-cleanout samples")

cc_jacc_to_baseline %>%
  mutate(CC_Interval = factor(CC_Interval, level = names(cc_intv_cols))) %>%
 filter(CC_RelDay >= -30 , CC_RelDay <= 30) %>%
  ggplot(
    aes(x = CC_RelDay, y = dist_to_baseline, 
        group = Subject, color = CC_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.7, size = 1.2) + 
  scale_color_manual(values = cc_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from colon cleanout") +
  ylab("Jaccard distance to 7 pre-colon-cleanout samples") +
  ylim(0.1, 1.0)

(Unweighted) UniFrac

abx_uniFrac_to_baseline <- uniFrac.df %>%
  filter(Abx_RelDay_1 >= -7 & Abx_RelDay_1 <= 0) %>%
  group_by(Subject, Group, Abx_RelDay_2) %>%
  summarize(dist_to_baseline = mean(dist)) %>%
  rename(Abx_RelDay = Abx_RelDay_2) %>%
  left_join(SMP)

diet_uniFrac_to_baseline <- uniFrac.df %>%
  filter(Diet_RelDay_1 >= -7 & Diet_RelDay_1 <= 0) %>%
  group_by(Subject, Group, Diet_RelDay_2) %>%
  summarize(dist_to_baseline = mean(dist)) %>%
  rename(Diet_RelDay = Diet_RelDay_2) %>% 
  left_join(SMP)

cc_uniFrac_to_baseline <- uniFrac.df %>%
  filter(CC_RelDay_1 >= -7 & CC_RelDay_1 <= 0) %>%
  group_by(Subject, Group, CC_RelDay_2) %>%
  summarize(dist_to_baseline = mean(dist)) %>%
  rename(CC_RelDay = CC_RelDay_2) %>%
  left_join(SMP)


noItv_uniFrac_to_baseline <- uniFrac.df %>%
  filter(Group == "NoIntv", DaysFromStart_1 >= 0, DaysFromStart_1 <= 10) %>%
  group_by(Subject, Group, DaysFromStart_2) %>%
  summarize(dist_to_baseline = mean(dist)) %>%
  rename(DaysFromStart = DaysFromStart_2) %>%
  left_join(SMP) %>%
  mutate(Interval = as.character(Interval))

uniFrac_to_baseline <- list(
  abx_uniFrac_to_baseline %>% mutate(Interval = Abx_Interval, RelDay = Abx_RelDay) %>%
    select(Meas_ID, Subject, Group, Interval, RelDay, dist_to_baseline),
  diet_uniFrac_to_baseline %>% mutate(Interval = Diet_Interval, RelDay = Diet_RelDay) %>%
    select(Meas_ID, Subject, Group, Interval, RelDay, dist_to_baseline),
  cc_uniFrac_to_baseline %>% mutate(Interval = CC_Interval, RelDay = CC_RelDay) %>%
    select(Meas_ID, Subject, Group, Interval, RelDay, dist_to_baseline),
  noItv_uniFrac_to_baseline %>% mutate(RelDay = DaysFromStart) %>%
    select(Meas_ID, Subject, Group, Interval, RelDay, dist_to_baseline)
)
names(uniFrac_to_baseline) <- c("Abx", "Diet", "CC", "NoIntv")
uniFrac_to_baseline <- plyr::ldply(uniFrac_to_baseline, function(df) df, .id = "perturbation") %>%
  mutate(Interval = factor(Interval, levels = names(intv_cols)))

save(list = c("brayD.ihs", "bray.df", "bray_to_baseline",
              "jaccardD", "jacc.df", "jacc_to_baseline",
              "uniFracD.lst", "uniFrac.df", "uniFrac_to_baseline"),
     file = "output/pairwise_dist_to_baseline_subj_16S.rda")
uniFrac_to_baseline %>%
  ggplot(
    aes(x = RelDay, y = dist_to_baseline, 
        group = Subject, color = Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1.2) + 
  scale_color_manual(values = intv_cols) + 
  theme(legend.position = "bottom") + 
  facet_wrap(~ perturbation, scales = "free_x") +
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from  perturbation") +
  ylab("UniFrac distance to baseline") 

uniFrac_to_baseline %>%
  filter(perturbation != "NoIntv") %>%
  ggplot(
    aes(x = RelDay, y = dist_to_baseline, 
        group = Subject, color = Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1) + 
  scale_color_manual(values = intv_cols) + 
  theme(legend.position = "bottom") + 
  facet_wrap(~ perturbation, scales = "free_x", ncol = 3) +
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from  perturbation") +
  ylab("UniFrac distance to baseline") 

uniFrac_to_baseline %>%
  filter(perturbation != "NoIntv", RelDay <=30, RelDay >= -30) %>%
  ggplot(
    aes(x = RelDay, y = dist_to_baseline, 
        group = Subject, color = Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1) + 
  scale_color_manual(values = intv_cols) + 
  theme(legend.position = "bottom") + 
  facet_wrap(~ perturbation, ncol = 3) +
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from  perturbation") +
  ylab("UniFrac distance to baseline") 

abx_uniFrac_to_baseline %>%
  mutate(Abx_Interval = factor(Abx_Interval, level = names(abx_intv_cols))) %>%
  filter(Abx_RelDay >= -50) %>%
  ggplot(
    aes(x = Abx_RelDay, y = dist_to_baseline, 
        group = Subject, color = Abx_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1.2) + 
  scale_color_manual(values = abx_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from initial antibiotic dose") +
  ylab("UniFrac distance to 7 pre-antibiotic samples")

abx_uniFrac_to_baseline %>%
  mutate(Abx_Interval = factor(Abx_Interval, level = names(abx_intv_cols))) %>%
  filter(Abx_RelDay <= 30, Abx_RelDay >= -30) %>%
  ggplot(
    aes(x = Abx_RelDay, y = dist_to_baseline, 
        group = Subject, color = Abx_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1.5) + 
  scale_color_manual(values = abx_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from initial antibiotic dose") +
  ylab("UniFrac distance to 7 pre-antibiotic samples") +
  ylim(0.1, 1.0)

diet_uniFrac_to_baseline %>%
  mutate(Diet_Interval = factor(Diet_Interval, level = names(diet_intv_cols))) %>%
  filter(Diet_RelDay <= 50) %>%
  ggplot(
    aes(x = Diet_RelDay, y = dist_to_baseline, 
        group = Subject, color = Diet_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.7, size = 1.2) + 
  scale_color_manual(values = diet_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from  diet initiation") +
  ylab("UniFrac distance to 7 pre-diet samples")

diet_uniFrac_to_baseline %>%
  mutate(Diet_Interval = factor(Diet_Interval, level = names(diet_intv_cols))) %>%
  filter(Diet_RelDay <= 30, Diet_RelDay >= -30) %>%
  ggplot(
    aes(x = Diet_RelDay, y = dist_to_baseline, 
        group = Subject, color = Diet_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.7, size = 1.2) + 
  scale_color_manual(values = diet_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from diet initiation") +
  ylab("UniFrac distance to 7 pre-diet samples") +
  ylim(0.1, 1.0)

cc_uniFrac_to_baseline %>%
  mutate(CC_Interval = factor(CC_Interval, level = names(cc_intv_cols))) %>%
  filter(CC_RelDay >= -50 , CC_RelDay <= 50) %>%
  ggplot(
    aes(x = CC_RelDay, y = dist_to_baseline, 
        group = Subject, color = CC_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.7, size = 1.2) + 
  scale_color_manual(values = cc_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from colon cleanout") +
  ylab("UniFrac distance to 7 pre-colon-cleanout samples")

cc_uniFrac_to_baseline %>%
  mutate(CC_Interval = factor(CC_Interval, level = names(cc_intv_cols))) %>%
 filter(CC_RelDay >= -30 , CC_RelDay <= 30) %>%
  ggplot(
    aes(x = CC_RelDay, y = dist_to_baseline, 
        group = Subject, color = CC_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.7, size = 1.2) + 
  scale_color_manual(values = cc_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from colon cleanout") +
  ylab("UniFrac distance to 7 pre-colon-cleanout samples") +
  ylim(0.1, 1.0)

sessionInfo()
R version 3.5.1 (2018-07-02)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: CentOS Linux 7 (Core)

Matrix products: default
BLAS/LAPACK: /share/software/user/open/openblas/0.2.19/lib/libopenblasp-r0.2.19.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8    LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] decontam_1.2.1       rmarkdown_1.14       BiocStyle_2.10.0     forcats_0.4.0        stringr_1.4.0        dplyr_0.8.3         
 [7] purrr_0.3.2          readr_1.3.1          tidyr_0.8.3          tibble_2.1.3         ggplot2_3.2.0        tidyverse_1.2.1.9000
[13] RColorBrewer_1.1-2   phyloseq_1.26.1     

loaded via a namespace (and not attached):
 [1] nlme_3.1-140        fs_1.3.1            lubridate_1.7.4     httr_1.4.0          tools_3.5.1         backports_1.1.4    
 [7] R6_2.4.0            vegan_2.5-5         DBI_1.0.0           lazyeval_0.2.2      BiocGenerics_0.28.0 mgcv_1.8-28        
[13] colorspace_1.4-1    permute_0.9-5       ade4_1.7-13         withr_2.1.2         tidyselect_0.2.5    compiler_3.5.1     
[19] cli_1.1.0           rvest_0.3.4         Biobase_2.42.0      xml2_1.2.0          labeling_0.3        scales_1.0.0       
[25] digest_0.6.20       XVector_0.22.0      base64enc_0.1-3     pkgconfig_2.0.2     htmltools_0.3.6     dbplyr_1.4.2       
[31] highr_0.8           rlang_0.4.0         readxl_1.3.1        rstudioapi_0.10     generics_0.0.2      jsonlite_1.6       
[37] magrittr_1.5        biomformat_1.10.1   Matrix_1.2-17       Rcpp_1.0.1          munsell_0.5.0       S4Vectors_0.20.1   
[43] Rhdf5lib_1.4.3      ape_5.3             stringi_1.4.3       yaml_2.2.0          MASS_7.3-51.4       zlibbioc_1.28.0    
[49] rhdf5_2.26.2        plyr_1.8.4          grid_3.5.1          parallel_3.5.1      crayon_1.3.4        lattice_0.20-38    
[55] Biostrings_2.50.2   haven_2.1.1         splines_3.5.1       multtest_2.38.0     hms_0.5.0           zeallot_0.1.0      
[61] knitr_1.23          pillar_1.4.2        igraph_1.2.4.1      reshape2_1.4.3      codetools_0.2-16    stats4_3.5.1       
[67] reprex_0.3.0        glue_1.3.1          evaluate_0.14       data.table_1.12.2   BiocManager_1.30.4  modelr_0.1.4       
[73] vctrs_0.2.0         foreach_1.4.4       cellranger_1.1.0    gtable_0.3.0        assertthat_0.2.1    xfun_0.8           
[79] broom_0.5.2         survival_2.44-1.1   viridisLite_0.3.0   iterators_1.0.10    IRanges_2.16.0      cluster_2.1.0      
---
title: "Response to perturbations"
output: html_notebook
---


```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library("phyloseq")
library("RColorBrewer")
library("tidyverse")
datadir <- "../../data/"
curdir <- getwd()
theme_set(theme_bw())
theme_update(text = element_text(20))

abx_intv_cols <- c("PreAbx" = "grey60", "MidAbx" = "#E41A1C", 
                   "PostAbx" = "#00BFC4", "UnpAbx" = "Purple")

diet_intv_cols <- c("PreDiet" = "grey60", "MidDiet" = "#FD8D3C", "PostDiet" = "#4DAF4A")
cc_intv_cols <- c("PreCC" = "grey60", "PostCC" = "#7A0177") #"#AE017E")

intv_cols <- c(abx_intv_cols, diet_intv_cols, cc_intv_cols, "NoInterv" = "grey60")
```



# Load 

```{r}
# File generated in /perturbation_16s/analysis/analysis_summer2019/generate_phyloseq.rmd
psSubj <- readRDS("../../data/16S/phyloseq/perturb_physeq_participants_decontam_15Jul19.rds")
psSubj
# otu_table()   OTU Table:         [ 2425 taxa and 4402 samples ]
# sample_data() Sample Data:       [ 4402 samples by 40 sample variables ]
SMP <- data.frame(sample_data(psSubj))
SUBJ <- SMP %>% select(Subject, Age:BirthYear) %>% distinct()
```

```{r}
SMP %>%
    select(Group, Subject) %>%
    distinct() %>%
    group_by(Group) %>%
    summarise(subject_count = n())
```

```{r}
load("output/pairwise_dist_subj_16S.rda")
ls()
```


# Distance to baseline

## Bray-Curtis distance

Mean shift from baseline samples [-7, 0] measured with Bray-Curtis distance.

```{r, eval = FALSE}
abx_bray_to_baseline <- bray.df %>%
  filter(Abx_RelDay_1 >= -7 & Abx_RelDay_1 <= 0) %>%
  group_by(Subject, Group, Abx_RelDay_2) %>%
  summarize(dist_to_baseline = mean(dist)) %>%
  rename(Abx_RelDay = Abx_RelDay_2) %>%
  left_join(SMP)

diet_bray_to_baseline <- bray.df %>%
  filter(Diet_RelDay_1 >= -7 & Diet_RelDay_1 <= 0) %>%
  group_by(Subject, Group, Diet_RelDay_2) %>%
  summarize(dist_to_baseline = mean(dist)) %>%
  rename(Diet_RelDay = Diet_RelDay_2) %>%
  left_join(SMP)

cc_bray_to_baseline <- bray.df %>%
  filter(CC_RelDay_1 >= -7 & CC_RelDay_1 <= 0) %>%
  group_by(Subject, Group, CC_RelDay_2) %>%
  summarize(dist_to_baseline = mean(dist)) %>%
  rename(CC_RelDay = CC_RelDay_2) %>%
  left_join(SMP)


noItv_bray_to_baseline <- bray.df %>%
  filter(Group == "NoIntv", DaysFromStart_1 >= 0, DaysFromStart_1 <= 10) %>%
  group_by(Subject, Group, DaysFromStart_2) %>%
  summarize(dist_to_baseline = mean(dist)) %>%
  rename(DaysFromStart = DaysFromStart_2) %>%
  left_join(SMP) %>%
  mutate(Interval = as.character(Interval))

bray_to_baseline <- list(
  abx_bray_to_baseline %>% mutate(Interval = Abx_Interval, RelDay = Abx_RelDay) %>%
    select(Meas_ID, Subject, Group, Interval, RelDay, dist_to_baseline),
  diet_bray_to_baseline %>% mutate(Interval = Diet_Interval, RelDay = Diet_RelDay) %>%
    select(Meas_ID, Subject, Group, Interval, RelDay, dist_to_baseline),
  cc_bray_to_baseline %>% mutate(Interval = CC_Interval, RelDay = CC_RelDay) %>%
    select(Meas_ID, Subject, Group, Interval, RelDay, dist_to_baseline),
  noItv_bray_to_baseline %>% mutate(RelDay = DaysFromStart) %>%
    select(Meas_ID, Subject, Group, Interval, RelDay, dist_to_baseline)
)
names(bray_to_baseline) <- c("Abx", "Diet", "CC", "NoIntv")
bray_to_baseline <- plyr::ldply(bray_to_baseline, function(df) df, .id = "perturbation") %>%
  mutate(Interval = factor(Interval, levels = names(intv_cols)))

# dim(abx_bray_to_baseline)  
# dim(diet_bray_to_baseline) 
# dim(cc_bray_to_baseline) 
# dim(noItv_bray_to_baseline)
# head(abx_bray_to_baseline)  
# head(diet_bray_to_baseline)  
# head(cc_bray_to_baseline)  
# head(noItv_bray_to_baseline)  


save(list = c("brayD.ihs", "bray.df", "bray_to_baseline",
              "jaccardD", "jacc.df",
              "uniFracD.lst", "uniFrac.df"),
     file = "output/pairwise_dist_to_baseline_subj_16S.rda")

```


```{r bray-to-baseline, fig.width=8}
bray_to_baseline %>%
  ggplot(
    aes(x = RelDay, y = dist_to_baseline, 
        group = Subject, color = Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1.2) + 
  scale_color_manual(values = intv_cols) + 
  theme(legend.position = "bottom") + 
  facet_wrap(~ perturbation, scales = "free_x") +
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from  perturbation") +
  ylab("Bray-Curtis distance to baseline") 

```

```{r bray-to-baseline-perturb, fig.width=10, fig.height=4}
bray_to_baseline %>%
  filter(perturbation != "NoIntv") %>%
  ggplot(
    aes(x = RelDay, y = dist_to_baseline, 
        group = Subject, color = Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1) + 
  scale_color_manual(values = intv_cols) + 
  theme(legend.position = "bottom") + 
  facet_wrap(~ perturbation, scales = "free_x", ncol = 3) +
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from  perturbation") +
  ylab("Bray-Curtis distance to baseline") 

```

```{r bray-to-baseline-perturb-zoom, fig.width=10, fig.height=4}
bray_to_baseline %>%
  filter(perturbation != "NoIntv", RelDay <=30, RelDay >= -30) %>%
  ggplot(
    aes(x = RelDay, y = dist_to_baseline, 
        group = Subject, color = Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1) + 
  scale_color_manual(values = intv_cols) + 
  theme(legend.position = "bottom") + 
  facet_wrap(~ perturbation, ncol = 3) +
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from  perturbation") +
  ylab("Bray-Curtis distance to baseline") 

```

```{r abx-bray-to-baseline, fig.width=8}
abx_bray_to_baseline %>%
  mutate(Abx_Interval = factor(Abx_Interval, level = names(abx_intv_cols))) %>%
  filter(Abx_RelDay >= -50) %>%
  ggplot(
    aes(x = Abx_RelDay, y = dist_to_baseline, 
        group = Subject, color = Abx_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1.2) + 
  scale_color_manual(values = abx_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from initial antibiotic dose") +
  ylab("Bray-Curtis distance to 7 pre-antibiotic samples") 
```


```{r abx-bray-to-baseline-zoom, fig.width=8}
abx_bray_to_baseline %>%
  mutate(Abx_Interval = factor(Abx_Interval, level = names(abx_intv_cols))) %>%
  filter(Abx_RelDay <= 30, Abx_RelDay >= -30) %>%
  ggplot(
    aes(x = Abx_RelDay, y = dist_to_baseline, 
        group = Subject, color = Abx_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1.5) + 
  scale_color_manual(values = abx_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from initial antibiotic dose") +
  ylab("Bray-Curtis distance to 7 pre-antibiotic samples") +
  ylim(0.05, 0.85)

```

```{r diet-bray-to-baseline, fig.width=8}

diet_bray_to_baseline %>%
  mutate(Diet_Interval = factor(Diet_Interval, level = names(diet_intv_cols))) %>%
  filter(Diet_RelDay <= 50) %>%
  ggplot(
    aes(x = Diet_RelDay, y = dist_to_baseline, 
        group = Subject, color = Diet_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.7, size = 1.2) + 
  scale_color_manual(values = diet_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from diet initiation") +
  ylab("Bray-Curtis distance to 7 pre-diet samples") 

```


```{r diet-bray-to-baseline-zoom, fig.width=8}
diet_bray_to_baseline %>%
  mutate(Diet_Interval = factor(Diet_Interval, level = names(diet_intv_cols))) %>%
  filter(Diet_RelDay <= 30, Diet_RelDay >= -30) %>%
  ggplot(
    aes(x = Diet_RelDay, y = dist_to_baseline, 
        group = Subject, color = Diet_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.7, size = 1.2) + 
  scale_color_manual(values = diet_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from diet initiation") +
  ylab("Bray-Curtis distance to 7 pre-diet samples") +
  ylim(0.05, 0.85)

```

```{r cc-bray-to-baseline, fig.width=8}
cc_bray_to_baseline %>%
  mutate(CC_Interval = factor(CC_Interval, level = names(cc_intv_cols))) %>%
  filter(CC_RelDay >= -50 , CC_RelDay <= 50) %>%
  ggplot(
    aes(x = CC_RelDay, y = dist_to_baseline, 
        group = Subject, color = CC_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.7, size = 1.2) + 
  scale_color_manual(values = cc_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from colon cleanout") +
  ylab("Bray-Curtis distance to 7 pre-colon-cleanout samples")

```

```{r cc-bray-to-baseline-zoom, fig.width=8}
cc_bray_to_baseline %>%
  mutate(CC_Interval = factor(CC_Interval, level = names(cc_intv_cols))) %>%
 filter(CC_RelDay >= -30 , CC_RelDay <= 30) %>%
  ggplot(
    aes(x = CC_RelDay, y = dist_to_baseline, 
        group = Subject, color = CC_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.7, size = 1.2) + 
  scale_color_manual(values = cc_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from colon cleanout") +
  ylab("Bray-Curtis distance to 7 pre-colon-cleanout samples") +
  ylim(0.05, 0.85)

```


## Jaccard distance

```{r, eval = FALSE}
abx_jacc_to_baseline <- jacc.df %>%
  filter(Abx_RelDay_1 >= -7 & Abx_RelDay_1 <= 0) %>%
  group_by(Subject, Group, Abx_RelDay_2) %>%
  summarize(dist_to_baseline = mean(dist)) %>%
  rename(Abx_RelDay = Abx_RelDay_2) %>%
  left_join(SMP)

diet_jacc_to_baseline <- jacc.df %>%
  filter(Diet_RelDay_1 >= -7 & Diet_RelDay_1 <= 0) %>%
  group_by(Subject, Group, Diet_RelDay_2) %>%
  summarize(dist_to_baseline = mean(dist)) %>%
  rename(Diet_RelDay = Diet_RelDay_2) %>% 
  left_join(SMP)

cc_jacc_to_baseline <- jacc.df %>%
  filter(CC_RelDay_1 >= -7 & CC_RelDay_1 <= 0) %>%
  group_by(Subject, Group, CC_RelDay_2) %>%
  summarize(dist_to_baseline = mean(dist)) %>%
  rename(CC_RelDay = CC_RelDay_2) %>%
  left_join(SMP)


noItv_jacc_to_baseline <- jacc.df %>%
  filter(Group == "NoIntv", DaysFromStart_1 >= 0, DaysFromStart_1 <= 10) %>%
  group_by(Subject, Group, DaysFromStart_2) %>%
  summarize(dist_to_baseline = mean(dist)) %>%
  rename(DaysFromStart = DaysFromStart_2) %>%
  left_join(SMP) %>%
  mutate(Interval = as.character(Interval))

jacc_to_baseline <- list(
  abx_jacc_to_baseline %>% mutate(Interval = Abx_Interval, RelDay = Abx_RelDay) %>%
    select(Meas_ID, Subject, Group, Interval, RelDay, dist_to_baseline),
  diet_jacc_to_baseline %>% mutate(Interval = Diet_Interval, RelDay = Diet_RelDay) %>%
    select(Meas_ID, Subject, Group, Interval, RelDay, dist_to_baseline),
  cc_jacc_to_baseline %>% mutate(Interval = CC_Interval, RelDay = CC_RelDay) %>%
    select(Meas_ID, Subject, Group, Interval, RelDay, dist_to_baseline),
  noItv_jacc_to_baseline %>% mutate(RelDay = DaysFromStart) %>%
    select(Meas_ID, Subject, Group, Interval, RelDay, dist_to_baseline)
)
names(jacc_to_baseline) <- c("Abx", "Diet", "CC", "NoIntv")
jacc_to_baseline <- plyr::ldply(jacc_to_baseline, function(df) df, .id = "perturbation") %>%
  mutate(Interval = factor(Interval, levels = names(intv_cols)))

save(list = c("brayD.ihs", "bray.df", "bray_to_baseline",
              "jaccardD", "jacc.df", "jacc_to_baseline",
              "uniFracD.lst", "uniFrac.df"),
     file = "output/pairwise_dist_to_baseline_subj_16S.rda")

```

```{r jacc-to-baseline, fig.width=8}
jacc_to_baseline %>%
  ggplot(
    aes(x = RelDay, y = dist_to_baseline, 
        group = Subject, color = Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1.2) + 
  scale_color_manual(values = intv_cols) + 
  theme(legend.position = "bottom") + 
  facet_wrap(~ perturbation, scales = "free_x") +
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from  perturbation") +
  ylab("Jaccard distance to baseline") 

```

```{r jacc-to-baseline-perturb, fig.width=10, fig.height=4}
jacc_to_baseline %>%
  filter(perturbation != "NoIntv") %>%
  ggplot(
    aes(x = RelDay, y = dist_to_baseline, 
        group = Subject, color = Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1) + 
  scale_color_manual(values = intv_cols) + 
  theme(legend.position = "bottom") + 
  facet_wrap(~ perturbation, scales = "free_x", ncol = 3) +
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from  perturbation") +
  ylab("Jaccard distance to baseline") 

```

```{r jacc-to-baseline-perturb-zoom, fig.width=10, fig.height=4}
jacc_to_baseline %>%
  filter(perturbation != "NoIntv", RelDay <=30, RelDay >= -30) %>%
  ggplot(
    aes(x = RelDay, y = dist_to_baseline, 
        group = Subject, color = Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1) + 
  scale_color_manual(values = intv_cols) + 
  theme(legend.position = "bottom") + 
  facet_wrap(~ perturbation, ncol = 3) +
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from  perturbation") +
  ylab("Jaccard distance to baseline") 

```


```{r abx-jacc-to-baseline, fig.width=8}
abx_jacc_to_baseline %>%
  mutate(Abx_Interval = factor(Abx_Interval, level = names(abx_intv_cols))) %>%
  filter(Abx_RelDay >= -50) %>%
  ggplot(
    aes(x = Abx_RelDay, y = dist_to_baseline, 
        group = Subject, color = Abx_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1.2) + 
  scale_color_manual(values = abx_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from initial antibiotic dose") +
  ylab("Jaccard distance to 7 pre-antibiotic samples")
```


```{r abx-jacc-to-baseline-zoom, fig.width=8}
abx_jacc_to_baseline %>%
  mutate(Abx_Interval = factor(Abx_Interval, level = names(abx_intv_cols))) %>%
  filter(Abx_RelDay <= 30, Abx_RelDay >= -30) %>%
  ggplot(
    aes(x = Abx_RelDay, y = dist_to_baseline, 
        group = Subject, color = Abx_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1.5) + 
  scale_color_manual(values = abx_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from initial antibiotic dose") +
  ylab("Jaccard distance to 7 pre-antibiotic samples") +
  ylim(0.1, 1.0)

```

```{r diet-jacc-to-baseline, fig.width=8}

diet_jacc_to_baseline %>%
  mutate(Diet_Interval = factor(Diet_Interval, level = names(diet_intv_cols))) %>%
  filter(Diet_RelDay <= 50) %>%
  ggplot(
    aes(x = Diet_RelDay, y = dist_to_baseline, 
        group = Subject, color = Diet_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.7, size = 1.2) + 
  scale_color_manual(values = diet_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from diet initiation") +
  ylab("Jaccard distance to 7 pre-diet samples")

```


```{r diet-jacc-to-baseline-zoom, fig.width=8}
diet_jacc_to_baseline %>%
  mutate(Diet_Interval = factor(Diet_Interval, level = names(diet_intv_cols))) %>%
  filter(Diet_RelDay <= 30, Diet_RelDay >= -30) %>%
  ggplot(
    aes(x = Diet_RelDay, y = dist_to_baseline, 
        group = Subject, color = Diet_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.7, size = 1.2) + 
  scale_color_manual(values = diet_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from diet initiation") +
  ylab("Jaccard distance to 7 pre-diet samples") +
  ylim(0.1, 1.0)

```

```{r cc-jacc-to-baseline, fig.width=8}
cc_jacc_to_baseline %>%
  mutate(CC_Interval = factor(CC_Interval, level = names(cc_intv_cols))) %>%
  filter(CC_RelDay >= -50 , CC_RelDay <= 50) %>%
  ggplot(
    aes(x = CC_RelDay, y = dist_to_baseline, 
        group = Subject, color = CC_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.7, size = 1.2) + 
  scale_color_manual(values = cc_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from colon cleanout") +
  ylab("Jaccard distance to 7 pre-colon-cleanout samples")

```

```{r cc-jacc-to-baseline-zoom, fig.width=8}
cc_jacc_to_baseline %>%
  mutate(CC_Interval = factor(CC_Interval, level = names(cc_intv_cols))) %>%
 filter(CC_RelDay >= -30 , CC_RelDay <= 30) %>%
  ggplot(
    aes(x = CC_RelDay, y = dist_to_baseline, 
        group = Subject, color = CC_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.7, size = 1.2) + 
  scale_color_manual(values = cc_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from colon cleanout") +
  ylab("Jaccard distance to 7 pre-colon-cleanout samples") +
  ylim(0.1, 1.0)

```


## (Unweighted) UniFrac


```{r, eval = FALSE}
abx_uniFrac_to_baseline <- uniFrac.df %>%
  filter(Abx_RelDay_1 >= -7 & Abx_RelDay_1 <= 0) %>%
  group_by(Subject, Group, Abx_RelDay_2) %>%
  summarize(dist_to_baseline = mean(dist)) %>%
  rename(Abx_RelDay = Abx_RelDay_2) %>%
  left_join(SMP)

diet_uniFrac_to_baseline <- uniFrac.df %>%
  filter(Diet_RelDay_1 >= -7 & Diet_RelDay_1 <= 0) %>%
  group_by(Subject, Group, Diet_RelDay_2) %>%
  summarize(dist_to_baseline = mean(dist)) %>%
  rename(Diet_RelDay = Diet_RelDay_2) %>% 
  left_join(SMP)

cc_uniFrac_to_baseline <- uniFrac.df %>%
  filter(CC_RelDay_1 >= -7 & CC_RelDay_1 <= 0) %>%
  group_by(Subject, Group, CC_RelDay_2) %>%
  summarize(dist_to_baseline = mean(dist)) %>%
  rename(CC_RelDay = CC_RelDay_2) %>%
  left_join(SMP)


noItv_uniFrac_to_baseline <- uniFrac.df %>%
  filter(Group == "NoIntv", DaysFromStart_1 >= 0, DaysFromStart_1 <= 10) %>%
  group_by(Subject, Group, DaysFromStart_2) %>%
  summarize(dist_to_baseline = mean(dist)) %>%
  rename(DaysFromStart = DaysFromStart_2) %>%
  left_join(SMP) %>%
  mutate(Interval = as.character(Interval))

uniFrac_to_baseline <- list(
  abx_uniFrac_to_baseline %>% mutate(Interval = Abx_Interval, RelDay = Abx_RelDay) %>%
    select(Meas_ID, Subject, Group, Interval, RelDay, dist_to_baseline),
  diet_uniFrac_to_baseline %>% mutate(Interval = Diet_Interval, RelDay = Diet_RelDay) %>%
    select(Meas_ID, Subject, Group, Interval, RelDay, dist_to_baseline),
  cc_uniFrac_to_baseline %>% mutate(Interval = CC_Interval, RelDay = CC_RelDay) %>%
    select(Meas_ID, Subject, Group, Interval, RelDay, dist_to_baseline),
  noItv_uniFrac_to_baseline %>% mutate(RelDay = DaysFromStart) %>%
    select(Meas_ID, Subject, Group, Interval, RelDay, dist_to_baseline)
)
names(uniFrac_to_baseline) <- c("Abx", "Diet", "CC", "NoIntv")
uniFrac_to_baseline <- plyr::ldply(uniFrac_to_baseline, function(df) df, .id = "perturbation") %>%
  mutate(Interval = factor(Interval, levels = names(intv_cols)))

save(list = c("brayD.ihs", "bray.df", "bray_to_baseline",
              "jaccardD", "jacc.df", "jacc_to_baseline",
              "uniFracD.lst", "uniFrac.df", "uniFrac_to_baseline"),
     file = "output/pairwise_dist_to_baseline_subj_16S.rda")

```

```{r uniFrac-to-baseline, fig.width=8}
uniFrac_to_baseline %>%
  ggplot(
    aes(x = RelDay, y = dist_to_baseline, 
        group = Subject, color = Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1.2) + 
  scale_color_manual(values = intv_cols) + 
  theme(legend.position = "bottom") + 
  facet_wrap(~ perturbation, scales = "free_x") +
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from  perturbation") +
  ylab("UniFrac distance to baseline") 

```

```{r uniFrac-to-baseline-perturb, fig.width=10, fig.height=4}
uniFrac_to_baseline %>%
  filter(perturbation != "NoIntv") %>%
  ggplot(
    aes(x = RelDay, y = dist_to_baseline, 
        group = Subject, color = Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1) + 
  scale_color_manual(values = intv_cols) + 
  theme(legend.position = "bottom") + 
  facet_wrap(~ perturbation, scales = "free_x", ncol = 3) +
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from  perturbation") +
  ylab("UniFrac distance to baseline") 

```

```{r uniFrac-to-baseline-perturb-zoom, fig.width=10, fig.height=4}
uniFrac_to_baseline %>%
  filter(perturbation != "NoIntv", RelDay <=30, RelDay >= -30) %>%
  ggplot(
    aes(x = RelDay, y = dist_to_baseline, 
        group = Subject, color = Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1) + 
  scale_color_manual(values = intv_cols) + 
  theme(legend.position = "bottom") + 
  facet_wrap(~ perturbation, ncol = 3) +
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from  perturbation") +
  ylab("UniFrac distance to baseline") 

```


```{r abx-uniFrac-to-baseline, fig.width=8}
abx_uniFrac_to_baseline %>%
  mutate(Abx_Interval = factor(Abx_Interval, level = names(abx_intv_cols))) %>%
  filter(Abx_RelDay >= -50) %>%
  ggplot(
    aes(x = Abx_RelDay, y = dist_to_baseline, 
        group = Subject, color = Abx_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1.2) + 
  scale_color_manual(values = abx_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from initial antibiotic dose") +
  ylab("UniFrac distance to 7 pre-antibiotic samples")
```


```{r abx-uniFrac-to-baseline-zoom, fig.width=8}
abx_uniFrac_to_baseline %>%
  mutate(Abx_Interval = factor(Abx_Interval, level = names(abx_intv_cols))) %>%
  filter(Abx_RelDay <= 30, Abx_RelDay >= -30) %>%
  ggplot(
    aes(x = Abx_RelDay, y = dist_to_baseline, 
        group = Subject, color = Abx_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1.5) + 
  scale_color_manual(values = abx_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from initial antibiotic dose") +
  ylab("UniFrac distance to 7 pre-antibiotic samples") +
  ylim(0.1, 1.0)

```

```{r diet-uniFrac-to-baseline, fig.width=8}

diet_uniFrac_to_baseline %>%
  mutate(Diet_Interval = factor(Diet_Interval, level = names(diet_intv_cols))) %>%
  filter(Diet_RelDay <= 50) %>%
  ggplot(
    aes(x = Diet_RelDay, y = dist_to_baseline, 
        group = Subject, color = Diet_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.7, size = 1.2) + 
  scale_color_manual(values = diet_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from  diet initiation") +
  ylab("UniFrac distance to 7 pre-diet samples")

```


```{r diet-uniFrac-to-baseline-zoom, fig.width=8}
diet_uniFrac_to_baseline %>%
  mutate(Diet_Interval = factor(Diet_Interval, level = names(diet_intv_cols))) %>%
  filter(Diet_RelDay <= 30, Diet_RelDay >= -30) %>%
  ggplot(
    aes(x = Diet_RelDay, y = dist_to_baseline, 
        group = Subject, color = Diet_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.7, size = 1.2) + 
  scale_color_manual(values = diet_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from diet initiation") +
  ylab("UniFrac distance to 7 pre-diet samples") +
  ylim(0.1, 1.0)

```

```{r cc-uniFrac-to-baseline, fig.width=8}
cc_uniFrac_to_baseline %>%
  mutate(CC_Interval = factor(CC_Interval, level = names(cc_intv_cols))) %>%
  filter(CC_RelDay >= -50 , CC_RelDay <= 50) %>%
  ggplot(
    aes(x = CC_RelDay, y = dist_to_baseline, 
        group = Subject, color = CC_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.7, size = 1.2) + 
  scale_color_manual(values = cc_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from colon cleanout") +
  ylab("UniFrac distance to 7 pre-colon-cleanout samples")

```

```{r cc-uniFrac-to-baseline-zoom, fig.width=8}
cc_uniFrac_to_baseline %>%
  mutate(CC_Interval = factor(CC_Interval, level = names(cc_intv_cols))) %>%
 filter(CC_RelDay >= -30 , CC_RelDay <= 30) %>%
  ggplot(
    aes(x = CC_RelDay, y = dist_to_baseline, 
        group = Subject, color = CC_Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.7, size = 1.2) + 
  scale_color_manual(values = cc_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from colon cleanout") +
  ylab("UniFrac distance to 7 pre-colon-cleanout samples") +
  ylim(0.1, 1.0)

```




```{r}
sessionInfo()
```

